library(stringr)
library(gtools)
library(plyr) #must load plyr first for summarise() to work
library(dplyr)

the_prefix <- ""

#### KFF demos
####
load(
    paste0(
        the_prefix,
        "kff-joint-nogeo-12122018.Rdata"
    )
)

sdat$AGEN <- as.numeric(as.character(sdat$AGE))
sdat$DEM <- (sdat$PID==1)*1
sdat$IND <- (sdat$PID==2)*1

sdat$date <- factor(tolower(sdat$DATE))
sdat$date <- as.character(dplyr::recode(
                      sdat$date,
                      `dec13`="2013-12-01",
                      `jan16`="2016-01-01",
                      `jul09`="2009-07-01",
                      `jul16`="2016-07-01",
                      `jul11`="2011-07-01",
                      `nov11`="2011-11-01",
                      `mar11`="2011-03-01",
                      `mar13`="2013-03-01",
                      `mar14`="2014-03-01",
                      `mar15`="2015-03-01",
                      `may10`="2010-05-01",
                      `nov09`="2009-11-01",
                      `oct10`="2010-10-01",
                      `oct16`="2016-10-01"
                  ))




#### KFF text
####
## edit location of text files
thewd <- paste0(
    the_prefix,
    "openendallpublic/"
)

thefiles <- list.files(paste0(thewd, ""), include.dirs=T, recursive=T)

alldat <- list()

cat("\nReading text files..\n")
for (i in 1:length(thefiles)) {

    dat <- data.frame(text=NA)

    dat$text <- str_trim(
        paste(
            readLines(
                paste(thewd, "", thefiles[i], sep="")
            ),
            collapse=" "
        )
    )

    dat$date <- sapply(strsplit(thefiles[i], "-"), "[", 1)
    dat$psraid <- sapply(strsplit(thefiles[i], "-"), "[", 2)
    dat$row <- sapply(strsplit(thefiles[i], "-"), "[", 3)
    dat$acafavor <- sub(
        ".txt", "", sapply(strsplit(thefiles[i], "-"), "[", 4), fixed=T
    )

    alldat[[i]] <- dat

}

df <- ldply(alldat, data.frame)

addfile <- "oct16Amat121016TM.csv"
text <- read.csv(
    paste0(
    the_prefix,
    "openendedcodes/", addfile, sep=""
    )
)[,1:3]
names(text) <- c("psraid", "acafavor","text")
text$text <- as.character(text$text)
text$date <- substr(
    tolower(
        sub(
            "march", "mar",
            sub("openend", "", sub("coded/", "", sub("tocode/", "", addfile)))
            )
    ), 1, 5)
text$row <- row.names(text)

df <- rbind(df, text)

cat("\nFormatting text data frame..\n")
df$date <- factor(df$date)
df$date <- dplyr::recode(
                      df$date,
                      `dec13`="2013-12-01",
                      `jan16`="2016-01-01",
                      `jul09`="2009-07-01",
                      `jul16`="2016-07-01",
                      `jul11`="2011-07-01",
                      `nov11`="2011-11-01",
                      `mar11`="2011-03-01",
                      `mar13`="2013-03-01",
                      `mar14`="2014-03-01",
                      `mar15`="2015-03-01",
                      `may10`="2010-05-01",
                      `nov09`="2009-11-01",
                      `oct10`="2010-10-01",
                      `oct16`="2016-10-01"
                  )
## july 11 and nov 11?
df$date <- as.Date(df$date)


df$medicare <- grepl("medicare", df$text, ignore.case=T)





#### merge KFF demos and text
####
df <- left_join(
    df %>% mutate(date = as.character(date), PSRAID = as.numeric(psraid)),
    sdat,
    by=c("PSRAID","date")
)




#### aggregate and plot
####
agg <- subset(df, AGEN >= 65) %>%
    mutate(date = as.Date(date)) %>%
    group_by(date) %>%
    summarise(medicare = mean(medicare))

agg_compare <- subset(df, AGEN < 65 & AGEN >= 50) %>%
    mutate(date = as.Date(date)) %>%
    group_by(date) %>%
    summarise(medicare = mean(medicare))


pdf(
    paste0(
        the_prefix,
        "/figs/figureA7_medicare_openended_mentions.pdf"),
    width=5, height=3.5
)
par(mar=c(5,5,2,1))
plot(
    agg, type="l", ylim=c(0, 0.5),
    xlab="Open-Ended Survey Date",
    ylab="Fraction of Open-Ended Responses\nMentioning Medicare",
    bty="n"
)
points(agg, pch=16)
lines(
    agg_compare, col="purple", lty=2
)
points(agg_compare, pch=16, col="purple")
legend(
    "topright",
    legend=c("65 or over","50 to 64"),
    col=c("black","purple"),
    lty=c(1,2),
    bty="n"
)
dev.off()
